clus_data <- read.csv("clus_data.csv")
clus_data <- data.frame(clus_data)
str(clus_data)
## 'data.frame': 130 obs. of 43 variables:
## $ X : int 1 2 3 4 5 6 7 8 9 10 ...
## $ SUBTIME : Factor w/ 125 levels "10.3.17 12:56",..: 114 115 116 117 118 119 120 121 122 123 ...
## $ JOBFUNC : int 5 3 4 6 4 6 3 2 6 1 ...
## $ JOBFUNCOTHER: Factor w/ 14 levels "","Account Manager",..: 1 1 1 14 1 9 1 1 3 1 ...
## $ JOBTIME : int 48 60 12 36 9999 60 7 36 12 36 ...
## $ GENDER : Factor w/ 2 levels "F","M": 2 2 2 2 2 NA 2 1 2 2 ...
## $ INVA : int 2 2 3 3 1 4 5 1 2 3 ...
## $ INVB : int NA 4 4 4 1 NA 5 5 4 5 ...
## $ INVC : int 4 2 3 3 1 NA 4 2 3 2 ...
## $ INVD : int NA 2 3 3 1 NA 5 2 2 4 ...
## $ INVE : int NA 4 3 3 5 NA 1 4 4 3 ...
## $ INVF : int 4 2 4 4 1 NA 4 3 3 4 ...
## $ INVG : int 2 2 1 2 5 NA 5 1 1 1 ...
## $ INVH : int NA 4 2 3 5 NA 1 1 2 3 ...
## $ INVI : int NA 4 2 4 5 NA 2 4 3 2 ...
## $ INVJ : int 2 5 2 1 2 NA 1 1 1 2 ...
## $ INVK : int NA NA NA 3 4 NA 2 1 1 2 ...
## $ INVL : int NA NA NA 3 5 NA 4 1 4 4 ...
## $ INVM : int 3 2 NA 2 4 NA 3 3 3 2 ...
## $ INVN : int NA 2 NA 2 3 NA 2 2 3 3 ...
## $ ROLE : Factor w/ 4 levels "Developer","Manager",..: 1 2 1 1 1 3 2 4 2 1 ...
## $ worktime : int 48 60 12 36 NA 60 7 36 12 36 ...
## $ age_range : Factor w/ 5 levels "20 or less","21-30",..: 5 4 3 4 3 3 3 4 4 4 ...
## $ gender : Factor w/ 2 levels "Female","Male": 2 2 2 2 2 NA 2 1 2 2 ...
## $ team_size : Factor w/ 5 levels "11-20","3-5",..: 1 1 1 1 1 3 NA 2 3 2 ...
## $ USERA : int 5 3 5 5 3 4 4 5 5 3 ...
## $ USERB : int 2 5 2 3 5 5 4 1 1 4 ...
## $ USERC : int 5 2 3 4 2 4 4 4 5 2 ...
## $ USERD : int 3 2 3 4 2 2 3 5 4 1 ...
## $ USERE : int 4 3 4 4 2 2 4 5 4 1 ...
## $ USERF : int 4 2 4 5 2 3 3 5 4 1 ...
## $ INF.MNG : int 4 3 NA 4 3 3 3 4 5 2 ...
## $ INF.UX : int 4 4 NA NA 3 4 3 5 4 2 ...
## $ INF.DEV : int 4 2 NA 4 3 3 2 4 4 2 ...
## $ INF.SLF : int 4 2 4 4 2 3 3 5 5 5 ...
## $ condexp : Factor w/ 4 levels "Never","Occasionally",..: 4 1 2 4 2 4 2 4 4 4 ...
## $ X3.3A : int 4 5 4 3 1 5 3 4 4 4 ...
## $ X3.3B : int 3 2 4 4 5 2 5 5 2 4 ...
## $ X3.3C : int 5 5 4 4 1 5 3 4 4 4 ...
## $ X3.3D : int 3 2 4 4 4 4 5 5 2 4 ...
## $ X3.3E : int 3 5 4 2 1 5 5 2 4 4 ...
## $ X3.3F : int 3 3 4 4 4 3 5 4 1 3 ...
## $ COMPANY : Factor w/ 4 levels "Ericsson","F-secure",..: 1 1 1 1 1 1 1 1 1 1 ...
cols <- c(7:20, 26:35, 37:42) # Selecting the numerical columns
clus_data_selected <- clus_data[, cols]
str(clus_data_selected)
## 'data.frame': 130 obs. of 30 variables:
## $ INVA : int 2 2 3 3 1 4 5 1 2 3 ...
## $ INVB : int NA 4 4 4 1 NA 5 5 4 5 ...
## $ INVC : int 4 2 3 3 1 NA 4 2 3 2 ...
## $ INVD : int NA 2 3 3 1 NA 5 2 2 4 ...
## $ INVE : int NA 4 3 3 5 NA 1 4 4 3 ...
## $ INVF : int 4 2 4 4 1 NA 4 3 3 4 ...
## $ INVG : int 2 2 1 2 5 NA 5 1 1 1 ...
## $ INVH : int NA 4 2 3 5 NA 1 1 2 3 ...
## $ INVI : int NA 4 2 4 5 NA 2 4 3 2 ...
## $ INVJ : int 2 5 2 1 2 NA 1 1 1 2 ...
## $ INVK : int NA NA NA 3 4 NA 2 1 1 2 ...
## $ INVL : int NA NA NA 3 5 NA 4 1 4 4 ...
## $ INVM : int 3 2 NA 2 4 NA 3 3 3 2 ...
## $ INVN : int NA 2 NA 2 3 NA 2 2 3 3 ...
## $ USERA : int 5 3 5 5 3 4 4 5 5 3 ...
## $ USERB : int 2 5 2 3 5 5 4 1 1 4 ...
## $ USERC : int 5 2 3 4 2 4 4 4 5 2 ...
## $ USERD : int 3 2 3 4 2 2 3 5 4 1 ...
## $ USERE : int 4 3 4 4 2 2 4 5 4 1 ...
## $ USERF : int 4 2 4 5 2 3 3 5 4 1 ...
## $ INF.MNG: int 4 3 NA 4 3 3 3 4 5 2 ...
## $ INF.UX : int 4 4 NA NA 3 4 3 5 4 2 ...
## $ INF.DEV: int 4 2 NA 4 3 3 2 4 4 2 ...
## $ INF.SLF: int 4 2 4 4 2 3 3 5 5 5 ...
## $ X3.3A : int 4 5 4 3 1 5 3 4 4 4 ...
## $ X3.3B : int 3 2 4 4 5 2 5 5 2 4 ...
## $ X3.3C : int 5 5 4 4 1 5 3 4 4 4 ...
## $ X3.3D : int 3 2 4 4 4 4 5 5 2 4 ...
## $ X3.3E : int 3 5 4 2 1 5 5 2 4 4 ...
## $ X3.3F : int 3 3 4 4 4 3 5 4 1 3 ...
# and a scaled version of that with NA's removed
clus_data_scaled <- scale(na.omit(clus_data_selected))
cor_matrix <- cor(clus_data_selected, use = "pairwise.complete.obs")
print(cor_matrix %>% round(2))
## INVA INVB INVC INVD INVE INVF INVG INVH INVI INVJ INVK
## INVA 1.00 -0.09 0.55 0.46 -0.66 0.35 0.33 -0.01 -0.06 -0.04 -0.06
## INVB -0.09 1.00 -0.18 0.01 0.12 0.10 -0.19 -0.18 -0.13 -0.10 -0.19
## INVC 0.55 -0.18 1.00 0.48 -0.53 0.29 0.31 0.01 -0.05 0.01 0.03
## INVD 0.46 0.01 0.48 1.00 -0.56 0.22 0.34 -0.05 -0.09 0.03 -0.08
## INVE -0.66 0.12 -0.53 -0.56 1.00 -0.42 -0.39 0.03 0.03 0.04 0.13
## INVF 0.35 0.10 0.29 0.22 -0.42 1.00 0.33 -0.04 0.06 -0.19 -0.10
## INVG 0.33 -0.19 0.31 0.34 -0.39 0.33 1.00 0.11 0.08 -0.02 -0.02
## INVH -0.01 -0.18 0.01 -0.05 0.03 -0.04 0.11 1.00 0.23 0.23 0.44
## INVI -0.06 -0.13 -0.05 -0.09 0.03 0.06 0.08 0.23 1.00 0.10 0.14
## INVJ -0.04 -0.10 0.01 0.03 0.04 -0.19 -0.02 0.23 0.10 1.00 0.19
## INVK -0.06 -0.19 0.03 -0.08 0.13 -0.10 -0.02 0.44 0.14 0.19 1.00
## INVL -0.12 -0.01 -0.09 -0.17 0.33 -0.20 0.10 0.14 0.11 0.15 0.34
## INVM -0.06 -0.18 0.01 -0.09 0.06 -0.15 -0.05 0.22 0.14 0.07 0.43
## INVN -0.08 -0.10 -0.06 -0.05 0.17 -0.07 0.01 0.21 0.05 0.10 0.12
## USERA -0.13 0.08 -0.05 0.02 0.05 -0.03 -0.04 -0.02 0.00 -0.14 -0.14
## USERB 0.18 -0.14 -0.07 0.06 -0.02 0.03 0.15 -0.02 0.28 0.14 0.15
## USERC -0.12 0.09 0.04 -0.07 0.23 -0.07 -0.04 -0.11 -0.09 -0.23 -0.11
## USERD -0.16 0.13 0.07 -0.08 0.18 -0.30 -0.13 -0.18 -0.02 -0.14 -0.04
## USERE -0.05 0.17 0.13 0.00 -0.05 -0.03 0.04 -0.14 -0.15 -0.11 -0.05
## USERF -0.06 0.03 0.11 -0.10 0.12 -0.07 -0.06 -0.10 0.01 -0.19 -0.04
## INF.MNG -0.16 -0.06 0.11 0.05 0.11 0.13 0.02 -0.12 -0.08 -0.31 -0.19
## INF.UX -0.14 0.09 0.08 0.03 -0.02 0.11 0.01 -0.07 -0.02 -0.37 -0.14
## INF.DEV -0.12 0.05 0.08 0.09 0.03 0.13 0.08 -0.07 -0.06 -0.29 -0.21
## INF.SLF -0.13 0.21 0.10 0.00 0.11 -0.06 -0.10 -0.14 -0.24 -0.32 -0.14
## X3.3A -0.07 0.08 -0.12 -0.11 0.16 -0.03 -0.24 -0.11 -0.08 0.07 -0.08
## X3.3B 0.03 -0.03 -0.03 -0.10 0.03 -0.07 0.15 -0.03 0.12 -0.17 -0.06
## X3.3C 0.15 0.15 0.15 0.14 -0.12 0.06 -0.13 0.03 -0.03 -0.07 -0.08
## X3.3D 0.18 0.05 0.01 -0.16 -0.16 0.14 0.13 -0.17 0.16 -0.01 -0.09
## X3.3E 0.09 0.15 0.18 0.05 -0.01 0.11 -0.13 0.00 -0.06 -0.03 -0.07
## X3.3F 0.18 -0.05 0.06 0.10 -0.21 0.00 0.18 -0.01 -0.06 0.05 -0.11
## INVL INVM INVN USERA USERB USERC USERD USERE USERF INF.MNG
## INVA -0.12 -0.06 -0.08 -0.13 0.18 -0.12 -0.16 -0.05 -0.06 -0.16
## INVB -0.01 -0.18 -0.10 0.08 -0.14 0.09 0.13 0.17 0.03 -0.06
## INVC -0.09 0.01 -0.06 -0.05 -0.07 0.04 0.07 0.13 0.11 0.11
## INVD -0.17 -0.09 -0.05 0.02 0.06 -0.07 -0.08 0.00 -0.10 0.05
## INVE 0.33 0.06 0.17 0.05 -0.02 0.23 0.18 -0.05 0.12 0.11
## INVF -0.20 -0.15 -0.07 -0.03 0.03 -0.07 -0.30 -0.03 -0.07 0.13
## INVG 0.10 -0.05 0.01 -0.04 0.15 -0.04 -0.13 0.04 -0.06 0.02
## INVH 0.14 0.22 0.21 -0.02 -0.02 -0.11 -0.18 -0.14 -0.10 -0.12
## INVI 0.11 0.14 0.05 0.00 0.28 -0.09 -0.02 -0.15 0.01 -0.08
## INVJ 0.15 0.07 0.10 -0.14 0.14 -0.23 -0.14 -0.11 -0.19 -0.31
## INVK 0.34 0.43 0.12 -0.14 0.15 -0.11 -0.04 -0.05 -0.04 -0.19
## INVL 1.00 0.25 0.17 -0.07 0.05 0.16 0.07 0.06 0.06 -0.16
## INVM 0.25 1.00 0.06 -0.01 -0.08 0.02 0.04 0.00 0.10 -0.07
## INVN 0.17 0.06 1.00 -0.04 -0.04 0.02 0.06 -0.02 -0.05 -0.05
## USERA -0.07 -0.01 -0.04 1.00 -0.28 0.46 0.34 0.34 0.36 0.24
## USERB 0.05 -0.08 -0.04 -0.28 1.00 -0.51 -0.33 -0.35 -0.34 -0.14
## USERC 0.16 0.02 0.02 0.46 -0.51 1.00 0.50 0.44 0.54 0.30
## USERD 0.07 0.04 0.06 0.34 -0.33 0.50 1.00 0.59 0.71 0.19
## USERE 0.06 0.00 -0.02 0.34 -0.35 0.44 0.59 1.00 0.68 0.37
## USERF 0.06 0.10 -0.05 0.36 -0.34 0.54 0.71 0.68 1.00 0.35
## INF.MNG -0.16 -0.07 -0.05 0.24 -0.14 0.30 0.19 0.37 0.35 1.00
## INF.UX -0.16 0.06 -0.20 0.33 -0.36 0.26 0.35 0.27 0.39 0.34
## INF.DEV -0.09 0.03 0.03 0.39 -0.51 0.46 0.30 0.39 0.43 0.53
## INF.SLF 0.04 -0.01 -0.07 0.49 -0.53 0.62 0.44 0.46 0.49 0.37
## X3.3A 0.03 0.06 0.07 0.10 0.13 0.07 -0.08 -0.01 -0.01 0.15
## X3.3B 0.08 -0.12 -0.08 0.00 0.05 -0.03 0.11 0.00 0.05 -0.18
## X3.3C -0.09 -0.10 -0.04 0.14 0.01 0.12 0.15 0.11 0.12 0.19
## X3.3D -0.03 0.01 0.02 -0.06 0.01 -0.04 0.08 0.16 0.14 0.02
## X3.3E 0.08 -0.15 -0.07 0.03 -0.08 0.07 0.02 0.01 0.05 0.16
## X3.3F -0.17 -0.17 -0.18 0.12 -0.08 -0.04 0.10 0.13 0.10 -0.01
## INF.UX INF.DEV INF.SLF X3.3A X3.3B X3.3C X3.3D X3.3E X3.3F
## INVA -0.14 -0.12 -0.13 -0.07 0.03 0.15 0.18 0.09 0.18
## INVB 0.09 0.05 0.21 0.08 -0.03 0.15 0.05 0.15 -0.05
## INVC 0.08 0.08 0.10 -0.12 -0.03 0.15 0.01 0.18 0.06
## INVD 0.03 0.09 0.00 -0.11 -0.10 0.14 -0.16 0.05 0.10
## INVE -0.02 0.03 0.11 0.16 0.03 -0.12 -0.16 -0.01 -0.21
## INVF 0.11 0.13 -0.06 -0.03 -0.07 0.06 0.14 0.11 0.00
## INVG 0.01 0.08 -0.10 -0.24 0.15 -0.13 0.13 -0.13 0.18
## INVH -0.07 -0.07 -0.14 -0.11 -0.03 0.03 -0.17 0.00 -0.01
## INVI -0.02 -0.06 -0.24 -0.08 0.12 -0.03 0.16 -0.06 -0.06
## INVJ -0.37 -0.29 -0.32 0.07 -0.17 -0.07 -0.01 -0.03 0.05
## INVK -0.14 -0.21 -0.14 -0.08 -0.06 -0.08 -0.09 -0.07 -0.11
## INVL -0.16 -0.09 0.04 0.03 0.08 -0.09 -0.03 0.08 -0.17
## INVM 0.06 0.03 -0.01 0.06 -0.12 -0.10 0.01 -0.15 -0.17
## INVN -0.20 0.03 -0.07 0.07 -0.08 -0.04 0.02 -0.07 -0.18
## USERA 0.33 0.39 0.49 0.10 0.00 0.14 -0.06 0.03 0.12
## USERB -0.36 -0.51 -0.53 0.13 0.05 0.01 0.01 -0.08 -0.08
## USERC 0.26 0.46 0.62 0.07 -0.03 0.12 -0.04 0.07 -0.04
## USERD 0.35 0.30 0.44 -0.08 0.11 0.15 0.08 0.02 0.10
## USERE 0.27 0.39 0.46 -0.01 0.00 0.11 0.16 0.01 0.13
## USERF 0.39 0.43 0.49 -0.01 0.05 0.12 0.14 0.05 0.10
## INF.MNG 0.34 0.53 0.37 0.15 -0.18 0.19 0.02 0.16 -0.01
## INF.UX 1.00 0.56 0.45 -0.01 -0.01 0.16 -0.05 0.18 0.01
## INF.DEV 0.56 1.00 0.68 0.03 -0.03 0.20 0.03 0.02 0.02
## INF.SLF 0.45 0.68 1.00 0.02 0.03 0.22 0.02 0.12 -0.05
## X3.3A -0.01 0.03 0.02 1.00 -0.50 0.46 -0.08 0.20 -0.13
## X3.3B -0.01 -0.03 0.03 -0.50 1.00 -0.27 0.35 0.01 0.22
## X3.3C 0.16 0.20 0.22 0.46 -0.27 1.00 0.07 0.25 0.06
## X3.3D -0.05 0.03 0.02 -0.08 0.35 0.07 1.00 -0.06 0.32
## X3.3E 0.18 0.02 0.12 0.20 0.01 0.25 -0.06 1.00 0.02
## X3.3F 0.01 0.02 -0.05 -0.13 0.22 0.06 0.32 0.02 1.00
# add order = "hclust" as a parameter below for clustering of correlation coefficients
corrplot.mixed(cor_matrix, lower = "number", upper = "circle", order = "hclust")
# simpler view with clusters squared:
corrplot(cor_matrix, order = "hclust", addrect = 5)
get the most significant correlations (p > 0.05):
correlations <- rcorr(as.matrix(clus_data_scaled))
for (i in 1:30){
for (j in 1:30){
if ( !is.na(correlations$P[i,j])){
if ( correlations$P[i,j] < 0.05 ) {
print(paste(rownames(correlations$P)[i], "-" , colnames(correlations$P)[j], ": ", correlations$P[i,j]))
}}}}
## [1] "INVA - INVC : 5.97600688858613e-05"
## [1] "INVA - INVD : 8.89989073225017e-05"
## [1] "INVA - INVE : 3.84155813826226e-08"
## [1] "INVA - INVF : 0.00198188375699071"
## [1] "INVA - INVG : 0.00181498402122715"
## [1] "INVA - USERA : 0.0353299558107456"
## [1] "INVA - USERB : 0.0036006742456467"
## [1] "INVA - USERC : 0.0312598398747386"
## [1] "INVA - USERD : 0.0199488647352783"
## [1] "INVA - INF.SLF : 0.0412554118716966"
## [1] "INVB - INVH : 0.0219413904112731"
## [1] "INVB - INVK : 0.0258895287357712"
## [1] "INVB - INVM : 0.027868034378528"
## [1] "INVB - USERE : 0.0480961857970503"
## [1] "INVB - INF.SLF : 0.00873403508279336"
## [1] "INVB - X3.3E : 0.014731131345026"
## [1] "INVC - INVA : 5.97600688858613e-05"
## [1] "INVC - INVD : 6.46935275350202e-07"
## [1] "INVC - INVE : 0.000311494372586818"
## [1] "INVC - INVF : 0.0141054671985286"
## [1] "INVC - INVG : 0.0082405581477345"
## [1] "INVC - X3.3E : 0.0193337474844781"
## [1] "INVC - X3.3F : 0.0418212014638715"
## [1] "INVD - INVA : 8.89989073225017e-05"
## [1] "INVD - INVC : 6.46935275350202e-07"
## [1] "INVD - INVE : 3.49190905080121e-08"
## [1] "INVD - INVG : 0.000897161776227229"
## [1] "INVE - INVA : 3.84155813826226e-08"
## [1] "INVE - INVC : 0.000311494372586818"
## [1] "INVE - INVD : 3.49190905080121e-08"
## [1] "INVE - INVF : 0.00108847486518449"
## [1] "INVE - INVG : 0.00458732361555048"
## [1] "INVE - INVL : 0.00135806748739675"
## [1] "INVE - USERC : 0.00602840850829445"
## [1] "INVE - USERD : 0.0213030996404555"
## [1] "INVE - INF.SLF : 0.024334039746599"
## [1] "INVF - INVA : 0.00198188375699071"
## [1] "INVF - INVC : 0.0141054671985286"
## [1] "INVF - INVE : 0.00108847486518449"
## [1] "INVF - INVJ : 0.00708013807871888"
## [1] "INVF - X3.3E : 0.02474032352352"
## [1] "INVG - INVA : 0.00181498402122715"
## [1] "INVG - INVC : 0.0082405581477345"
## [1] "INVG - INVD : 0.000897161776227229"
## [1] "INVG - INVE : 0.00458732361555048"
## [1] "INVG - USERB : 0.0468498083014208"
## [1] "INVG - X3.3A : 0.0162703862893996"
## [1] "INVH - INVB : 0.0219413904112731"
## [1] "INVH - INVI : 0.00582602853296277"
## [1] "INVH - INVK : 8.93195067575014e-06"
## [1] "INVH - USERD : 0.0337626978285754"
## [1] "INVH - USERE : 0.00422267591765935"
## [1] "INVH - USERF : 0.031166497787726"
## [1] "INVH - X3.3D : 0.00816333431166094"
## [1] "INVI - INVH : 0.00582602853296277"
## [1] "INVI - USERB : 0.00251543695502021"
## [1] "INVI - INF.SLF : 0.0187977431290931"
## [1] "INVJ - INVF : 0.00708013807871888"
## [1] "INVJ - INF.MNG : 0.018037167998306"
## [1] "INVJ - INF.UX : 0.000932404587634883"
## [1] "INVJ - INF.DEV : 0.0282445092149526"
## [1] "INVJ - INF.SLF : 0.0284109796666756"
## [1] "INVK - INVB : 0.0258895287357712"
## [1] "INVK - INVH : 8.93195067575014e-06"
## [1] "INVK - INVL : 0.0333737968074144"
## [1] "INVK - INVM : 7.15269456774692e-05"
## [1] "INVK - INF.SLF : 0.0125418629938503"
## [1] "INVL - INVE : 0.00135806748739675"
## [1] "INVL - INVK : 0.0333737968074144"
## [1] "INVL - INVM : 0.0384085570445376"
## [1] "INVL - X3.3F : 0.00791216426280061"
## [1] "INVM - INVB : 0.027868034378528"
## [1] "INVM - INVK : 7.15269456774692e-05"
## [1] "INVM - INVL : 0.0384085570445376"
## [1] "USERA - INVA : 0.0353299558107456"
## [1] "USERA - USERB : 0.0147671851418938"
## [1] "USERA - USERC : 4.54691560456411e-05"
## [1] "USERA - USERD : 0.000301950446309762"
## [1] "USERA - USERE : 0.00345745114598728"
## [1] "USERA - USERF : 4.37381683067173e-05"
## [1] "USERA - INF.MNG : 0.01183380490133"
## [1] "USERA - INF.UX : 0.00310787721493977"
## [1] "USERA - INF.DEV : 0.00708466025786381"
## [1] "USERA - INF.SLF : 0.00162382361741198"
## [1] "USERA - X3.3C : 0.0422436146046841"
## [1] "USERB - INVA : 0.0036006742456467"
## [1] "USERB - INVG : 0.0468498083014208"
## [1] "USERB - INVI : 0.00251543695502021"
## [1] "USERB - USERA : 0.0147671851418938"
## [1] "USERB - USERC : 8.27741867270859e-07"
## [1] "USERB - USERD : 0.00147875266834863"
## [1] "USERB - USERE : 0.00301677745822593"
## [1] "USERB - USERF : 0.00754573091907584"
## [1] "USERB - INF.MNG : 0.0334260803798894"
## [1] "USERB - INF.UX : 0.0012839123771009"
## [1] "USERB - INF.DEV : 1.76837703369515e-06"
## [1] "USERB - INF.SLF : 4.08943029284181e-06"
## [1] "USERB - X3.3B : 0.0401008241631553"
## [1] "USERC - INVA : 0.0312598398747386"
## [1] "USERC - INVE : 0.00602840850829445"
## [1] "USERC - USERA : 4.54691560456411e-05"
## [1] "USERC - USERB : 8.27741867270859e-07"
## [1] "USERC - USERD : 8.66380357233965e-07"
## [1] "USERC - USERE : 8.19424107234568e-07"
## [1] "USERC - USERF : 2.56253194219624e-08"
## [1] "USERC - INF.MNG : 0.000319903050952863"
## [1] "USERC - INF.DEV : 0.000281923057243727"
## [1] "USERC - INF.SLF : 1.14681810714501e-08"
## [1] "USERC - X3.3B : 0.0270920551486193"
## [1] "USERD - INVA : 0.0199488647352783"
## [1] "USERD - INVE : 0.0213030996404555"
## [1] "USERD - INVH : 0.0337626978285754"
## [1] "USERD - USERA : 0.000301950446309762"
## [1] "USERD - USERB : 0.00147875266834863"
## [1] "USERD - USERC : 8.66380357233965e-07"
## [1] "USERD - USERE : 1.06399908217369e-07"
## [1] "USERD - USERF : 5.32056620983212e-11"
## [1] "USERD - INF.MNG : 0.0188418689054513"
## [1] "USERD - INF.UX : 0.000371729940110477"
## [1] "USERD - INF.DEV : 0.01249398339277"
## [1] "USERD - INF.SLF : 0.00129820106570566"
## [1] "USERE - INVB : 0.0480961857970503"
## [1] "USERE - INVH : 0.00422267591765935"
## [1] "USERE - USERA : 0.00345745114598728"
## [1] "USERE - USERB : 0.00301677745822593"
## [1] "USERE - USERC : 8.19424107234568e-07"
## [1] "USERE - USERD : 1.06399908217369e-07"
## [1] "USERE - USERF : 2.65121258280487e-12"
## [1] "USERE - INF.MNG : 9.9427145272557e-06"
## [1] "USERE - INF.UX : 0.00796522493967777"
## [1] "USERE - INF.DEV : 0.00075510222656483"
## [1] "USERE - INF.SLF : 0.000937177695052416"
## [1] "USERE - X3.3D : 0.035609753575728"
## [1] "USERF - INVH : 0.031166497787726"
## [1] "USERF - USERA : 4.37381683067173e-05"
## [1] "USERF - USERB : 0.00754573091907584"
## [1] "USERF - USERC : 2.56253194219624e-08"
## [1] "USERF - USERD : 5.32056620983212e-11"
## [1] "USERF - USERE : 2.65121258280487e-12"
## [1] "USERF - INF.MNG : 5.55148949468176e-05"
## [1] "USERF - INF.UX : 6.6553414268089e-05"
## [1] "USERF - INF.DEV : 1.54202987576735e-05"
## [1] "USERF - INF.SLF : 0.000128866693251961"
## [1] "USERF - X3.3D : 0.045621714906551"
## [1] "INF.MNG - INVJ : 0.018037167998306"
## [1] "INF.MNG - USERA : 0.01183380490133"
## [1] "INF.MNG - USERB : 0.0334260803798894"
## [1] "INF.MNG - USERC : 0.000319903050952863"
## [1] "INF.MNG - USERD : 0.0188418689054513"
## [1] "INF.MNG - USERE : 9.9427145272557e-06"
## [1] "INF.MNG - USERF : 5.55148949468176e-05"
## [1] "INF.MNG - INF.UX : 0.00218455141592488"
## [1] "INF.MNG - INF.DEV : 3.4459147535415e-08"
## [1] "INF.MNG - INF.SLF : 5.92143742239593e-06"
## [1] "INF.UX - INVJ : 0.000932404587634883"
## [1] "INF.UX - USERA : 0.00310787721493977"
## [1] "INF.UX - USERB : 0.0012839123771009"
## [1] "INF.UX - USERD : 0.000371729940110477"
## [1] "INF.UX - USERE : 0.00796522493967777"
## [1] "INF.UX - USERF : 6.6553414268089e-05"
## [1] "INF.UX - INF.MNG : 0.00218455141592488"
## [1] "INF.UX - INF.DEV : 7.0037015276192e-07"
## [1] "INF.UX - INF.SLF : 0.000454229630998171"
## [1] "INF.DEV - INVJ : 0.0282445092149526"
## [1] "INF.DEV - USERA : 0.00708466025786381"
## [1] "INF.DEV - USERB : 1.76837703369515e-06"
## [1] "INF.DEV - USERC : 0.000281923057243727"
## [1] "INF.DEV - USERD : 0.01249398339277"
## [1] "INF.DEV - USERE : 0.00075510222656483"
## [1] "INF.DEV - USERF : 1.54202987576735e-05"
## [1] "INF.DEV - INF.MNG : 3.4459147535415e-08"
## [1] "INF.DEV - INF.UX : 7.0037015276192e-07"
## [1] "INF.DEV - INF.SLF : 1.16981029307794e-08"
## [1] "INF.SLF - INVA : 0.0412554118716966"
## [1] "INF.SLF - INVB : 0.00873403508279336"
## [1] "INF.SLF - INVE : 0.024334039746599"
## [1] "INF.SLF - INVI : 0.0187977431290931"
## [1] "INF.SLF - INVJ : 0.0284109796666756"
## [1] "INF.SLF - INVK : 0.0125418629938503"
## [1] "INF.SLF - USERA : 0.00162382361741198"
## [1] "INF.SLF - USERB : 4.08943029284181e-06"
## [1] "INF.SLF - USERC : 1.14681810714501e-08"
## [1] "INF.SLF - USERD : 0.00129820106570566"
## [1] "INF.SLF - USERE : 0.000937177695052416"
## [1] "INF.SLF - USERF : 0.000128866693251961"
## [1] "INF.SLF - INF.MNG : 5.92143742239593e-06"
## [1] "INF.SLF - INF.UX : 0.000454229630998171"
## [1] "INF.SLF - INF.DEV : 1.16981029307794e-08"
## [1] "INF.SLF - X3.3C : 0.0130908794779845"
## [1] "X3.3A - INVG : 0.0162703862893996"
## [1] "X3.3A - X3.3B : 3.66904409077051e-08"
## [1] "X3.3A - X3.3C : 0.00177939176236919"
## [1] "X3.3B - USERB : 0.0401008241631553"
## [1] "X3.3B - USERC : 0.0270920551486193"
## [1] "X3.3B - X3.3A : 3.66904409077051e-08"
## [1] "X3.3B - X3.3C : 0.00528980566625714"
## [1] "X3.3B - X3.3D : 0.0310954820639853"
## [1] "X3.3C - USERA : 0.0422436146046841"
## [1] "X3.3C - INF.SLF : 0.0130908794779845"
## [1] "X3.3C - X3.3A : 0.00177939176236919"
## [1] "X3.3C - X3.3B : 0.00528980566625714"
## [1] "X3.3D - INVH : 0.00816333431166094"
## [1] "X3.3D - USERE : 0.035609753575728"
## [1] "X3.3D - USERF : 0.045621714906551"
## [1] "X3.3D - X3.3B : 0.0310954820639853"
## [1] "X3.3D - X3.3F : 0.00726066188896657"
## [1] "X3.3E - INVB : 0.014731131345026"
## [1] "X3.3E - INVC : 0.0193337474844781"
## [1] "X3.3E - INVF : 0.02474032352352"
## [1] "X3.3F - INVC : 0.0418212014638715"
## [1] "X3.3F - INVL : 0.00791216426280061"
## [1] "X3.3F - X3.3D : 0.00726066188896657"
fviz_nbclust(clus_data_scaled, kmeans, method = "wss") +
geom_vline(xintercept = 4, linetype = 2)+ labs(subtitle = "Elbow method")
fviz_nbclust(clus_data_scaled, kmeans, method = "silhouette")+
labs(subtitle = "Silhouette method")
fviz_nbclust(clus_data_scaled, kmeans, nstart = 25, method = "gap_stat", nboot = 50)+
labs(subtitle = "Gap statistic method")
nb <- NbClust(clus_data_scaled, distance = "euclidean", min.nc = 2,
max.nc = 7, method = "kmeans")
## *** : The Hubert index is a graphical method of determining the number of clusters.
## In the plot of Hubert index, we seek a significant knee that corresponds to a
## significant increase of the value of the measure i.e the significant peak in Hubert
## index second differences plot.
##
## *** : The D index is a graphical method of determining the number of clusters.
## In the plot of D index, we seek a significant knee (the significant peak in Dindex
## second differences plot) that corresponds to a significant increase of the value of
## the measure.
##
## *******************************************************************
## * Among all indices:
## * 9 proposed 2 as the best number of clusters
## * 3 proposed 3 as the best number of clusters
## * 2 proposed 4 as the best number of clusters
## * 2 proposed 5 as the best number of clusters
## * 4 proposed 6 as the best number of clusters
## * 4 proposed 7 as the best number of clusters
##
## ***** Conclusion *****
##
## * According to the majority rule, the best number of clusters is 2
##
##
## *******************************************************************
fviz_nbclust(nb)
## Among all indices:
## ===================
## * 2 proposed 0 as the best number of clusters
## * 9 proposed 2 as the best number of clusters
## * 3 proposed 3 as the best number of clusters
## * 2 proposed 4 as the best number of clusters
## * 2 proposed 5 as the best number of clusters
## * 4 proposed 6 as the best number of clusters
## * 4 proposed 7 as the best number of clusters
##
## Conclusion
## =========================
## * According to the majority rule, the best number of clusters is 2 .
col <- colorRampPalette(c("darkblue", "white", "darkorange"))(20) # get some colors
heatmap(x = cor_matrix, col=col, symm = TRUE)
alternative views:
suppressWarnings(chart.Correlation(cor_matrix, histogram = TRUE, pch = 19))
d2 <- dist(clus_data_scaled, method="euclidean")
hcl2 <- hclust(d2, method="ward.D2")
plot(hcl2, cex=.5)
groups2 <- cutree(hcl2, k=3)
another heatmap
#heatmap with company identifiers
# get a color palette equal to the number of clusters
clusterCols <- rainbow(length(unique(groups2)))
# create vector of colors for side bar
myClusterSideBar <- clusterCols[groups2]
# choose a color palette for the heat map
myheatcol <- rev(redgreen(75))
# draw the heat map
heatmap.2(clus_data_scaled, main="Hierarchical Cluster", Rowv=as.dendrogram(hcl2), Colv=NA, dendrogram="row", scale="row", col=myheatcol, density.info="none", trace="none", RowSideColors= myClusterSideBar)
pc <- princomp(cor_matrix, cor=TRUE)
summary(pc)
## Importance of components:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5
## Standard deviation 3.1840537 2.4792743 1.7806525 1.50580066 1.08722833
## Proportion of Variance 0.3379399 0.2048934 0.1056908 0.07558119 0.03940218
## Cumulative Proportion 0.3379399 0.5428333 0.6485241 0.72410527 0.76350745
## Comp.6 Comp.7 Comp.8 Comp.9
## Standard deviation 1.04840432 0.97569395 0.89673343 0.84882538
## Proportion of Variance 0.03663839 0.03173262 0.02680436 0.02401682
## Cumulative Proportion 0.80014584 0.83187846 0.85868282 0.88269964
## Comp.10 Comp.11 Comp.12 Comp.13
## Standard deviation 0.79877547 0.72752661 0.64416638 0.61753041
## Proportion of Variance 0.02126808 0.01764317 0.01383168 0.01271146
## Cumulative Proportion 0.90396771 0.92161088 0.93544256 0.94815402
## Comp.14 Comp.15 Comp.16 Comp.17
## Standard deviation 0.57605786 0.497954750 0.459936557 0.386392513
## Proportion of Variance 0.01106142 0.008265298 0.007051388 0.004976639
## Cumulative Proportion 0.95921544 0.967480735 0.974532123 0.979508762
## Comp.18 Comp.19 Comp.20 Comp.21
## Standard deviation 0.376864769 0.349848494 0.271638792 0.246975211
## Proportion of Variance 0.004734235 0.004079799 0.002459588 0.002033225
## Cumulative Proportion 0.984242997 0.988322796 0.990782384 0.992815609
## Comp.22 Comp.23 Comp.24 Comp.25
## Standard deviation 0.244604714 0.224296828 0.187234720 0.1557082002
## Proportion of Variance 0.001994382 0.001676969 0.001168561 0.0008081681
## Cumulative Proportion 0.994809991 0.996486960 0.997655521 0.9984636895
## Comp.26 Comp.27 Comp.28 Comp.29
## Standard deviation 0.1335082979 0.1124799368 0.0962377620 0.0796957195
## Proportion of Variance 0.0005941489 0.0004217245 0.0003087236 0.0002117136
## Cumulative Proportion 0.9990578383 0.9994795628 0.9997882864 1.0000000000
## Comp.30
## Standard deviation 2.026708e-08
## Proportion of Variance 1.369182e-17
## Cumulative Proportion 1.000000e+00
loadings(pc)
##
## Loadings:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8 Comp.9
## INVA 0.114 0.340 -0.102 0.173 -0.203
## INVB -0.150 0.143 0.348 -0.318 -0.120 0.397
## INVC 0.319 -0.308 0.179 -0.169 -0.155
## INVD 0.330 -0.223 0.135 -0.116 0.180
## INVE -0.370 0.154
## INVF 0.320 -0.343 0.105 -0.144
## INVG 0.115 0.268 -0.197 -0.188 -0.112 0.201
## INVH 0.184 -0.130 -0.299 -0.176 0.390 -0.178
## INVI 0.173 -0.141 -0.429 0.234 -0.204 -0.102
## INVJ 0.226 -0.105 0.381 0.126 0.217
## INVK 0.190 -0.205 -0.269 -0.240 -0.103
## INVL -0.284 0.162 -0.269 -0.336 -0.120
## INVM -0.216 -0.361 0.190 -0.239 -0.333 0.217
## INVN -0.196 -0.135 0.653 -0.455
## USERA -0.267 0.286 0.122
## USERB 0.268 0.159 -0.135 0.106
## USERC -0.282 -0.115
## USERD -0.262 -0.163 0.215
## USERE -0.271 -0.136 0.212 0.101 -0.130
## USERF -0.275 -0.155 0.103 0.132 -0.131
## INF.MNG -0.254 -0.278 0.135 -0.153
## INF.UX -0.269 -0.113 -0.243 -0.234
## INF.DEV -0.284 -0.159 -0.182
## INF.SLF -0.300
## X3.3A -0.106 0.470 0.311 -0.123
## X3.3B -0.456 0.249 -0.241 -0.126 -0.176
## X3.3C -0.127 0.101 0.376 0.105 0.308 -0.143 -0.165
## X3.3D 0.129 -0.295 0.318 0.366 -0.358 -0.210
## X3.3E 0.293 0.127 -0.376 -0.401 -0.551
## X3.3F 0.214 -0.229 0.207 0.305 0.277 -0.164 0.386
## Comp.10 Comp.11 Comp.12 Comp.13 Comp.14 Comp.15 Comp.16 Comp.17
## INVA 0.182 -0.216 0.101
## INVB 0.426 0.230 -0.115 0.167 0.143 -0.163
## INVC 0.128 -0.129 -0.105 -0.204 0.105
## INVD -0.117 0.186 -0.121 0.235 0.122 -0.445
## INVE -0.136 -0.109
## INVF 0.268 0.241 -0.245 -0.351 0.104
## INVG -0.150 -0.113 0.433 0.104 0.199 0.277 0.183
## INVH 0.395 0.151 0.293 -0.138 0.116
## INVI 0.629 0.196 -0.127 -0.145 0.109 -0.194
## INVJ 0.111 0.175 -0.472 -0.298 0.227 -0.238 0.126
## INVK 0.129 -0.102 0.391 -0.189 -0.247
## INVL -0.154 0.533 0.169 0.177 0.109 -0.122
## INVM 0.210 -0.224 -0.164 -0.329 0.227 -0.142
## INVN 0.222 0.123 -0.235 0.285
## USERA 0.156 0.211 0.261 -0.277 -0.442 -0.237
## USERB -0.375 0.113 0.252
## USERC 0.230 0.111 -0.118 -0.169 -0.193
## USERD -0.115 0.289 -0.154 0.134 0.221 0.160
## USERE 0.106 -0.276 0.280
## USERF 0.149 -0.180 0.160 -0.169 0.174
## INF.MNG -0.252 -0.248 -0.262 0.221 0.130 -0.145 -0.409
## INF.UX -0.118 0.209 0.354 0.360
## INF.DEV 0.105 -0.107 0.314
## INF.SLF 0.155 0.121 -0.267
## X3.3A -0.132 0.172 0.131 -0.133 -0.134 0.101 0.154
## X3.3B -0.171 0.289 -0.194
## X3.3C 0.159 0.212 0.387 0.273 0.316 -0.107
## X3.3D 0.310 -0.128 0.140 -0.167 -0.136
## X3.3E -0.195 -0.186 0.207 -0.167
## X3.3F -0.260 0.160 0.367 -0.177
## Comp.18 Comp.19 Comp.20 Comp.21 Comp.22 Comp.23 Comp.24 Comp.25
## INVA 0.233 -0.222 0.143 0.391 0.251 -0.234 -0.145
## INVB 0.182 0.174 0.176 0.109
## INVC -0.161 0.303 0.516 -0.272 -0.206 0.139
## INVD -0.146 -0.315 0.104 0.380 0.243
## INVE -0.222 0.124 0.127 0.175
## INVF -0.113 -0.272 -0.216 -0.191 -0.350
## INVG 0.226 0.108 0.445 -0.174 -0.231
## INVH 0.138 0.411 0.227 0.158
## INVI -0.223 -0.175
## INVJ 0.193 -0.172 -0.117 -0.103
## INVK -0.503 0.214 -0.108 0.239 -0.193 -0.101
## INVL -0.120 -0.302 0.273 0.177 0.194
## INVM 0.280 -0.115 -0.307 -0.152
## INVN -0.102
## USERA 0.414 -0.150 0.127 0.198 -0.131 0.187
## USERB 0.249 0.162 -0.189
## USERC -0.366 0.139 0.271 -0.387 0.142 0.176 -0.295
## USERD 0.163 -0.120 0.143 0.131
## USERE 0.320 0.109 -0.228 -0.330 -0.461
## USERF -0.241 -0.163 0.100 0.558
## INF.MNG 0.224 0.138 0.164
## INF.UX 0.148 -0.307 0.170 0.227 0.141 0.307 -0.191
## INF.DEV -0.118 -0.298 0.124 -0.171 -0.411 0.109
## INF.SLF 0.107 0.212 -0.299 0.158
## X3.3A 0.126 -0.542 0.208 0.236
## X3.3B 0.195 -0.164 -0.158 -0.270 -0.341
## X3.3C -0.126 -0.175 -0.345 0.156 -0.103 -0.143
## X3.3D 0.187 0.202 0.238 0.157 0.252 0.144
## X3.3E 0.173 -0.170 -0.164
## X3.3F -0.372 0.142
## Comp.26 Comp.27 Comp.28 Comp.29 Comp.30
## INVA 0.358 -0.131 0.133 -0.308
## INVB 0.127 0.231 -0.135
## INVC 0.162 -0.184
## INVD -0.157 -0.182
## INVE 0.352 -0.495 -0.168 -0.496
## INVF -0.167 -0.226
## INVG 0.114
## INVH -0.141 -0.180
## INVI 0.111
## INVJ 0.106 0.203 0.109 -0.267
## INVK 0.220
## INVL
## INVM -0.172
## INVN 0.147 -0.127
## USERA 0.126 0.120
## USERB -0.310 0.387 -0.397 -0.265
## USERC 0.386 -0.182 -0.124
## USERD 0.267 -0.632 -0.117 -0.190
## USERE -0.179 0.126 -0.255 -0.125 -0.111
## USERF 0.370 0.183 0.226 0.173
## INF.MNG 0.278 -0.112 0.199 0.282 -0.141
## INF.UX 0.125 0.174 -0.260
## INF.DEV 0.274 0.105 -0.520
## INF.SLF -0.546 -0.336 0.317 -0.244
## X3.3A 0.167 -0.130 -0.189
## X3.3B 0.215 0.193 0.147 -0.265
## X3.3C 0.161 0.109
## X3.3D -0.184
## X3.3E
## X3.3F -0.100 0.116 -0.157
##
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8
## SS loadings 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000
## Proportion Var 0.033 0.033 0.033 0.033 0.033 0.033 0.033 0.033
## Cumulative Var 0.033 0.067 0.100 0.133 0.167 0.200 0.233 0.267
## Comp.9 Comp.10 Comp.11 Comp.12 Comp.13 Comp.14 Comp.15
## SS loadings 1.000 1.000 1.000 1.000 1.000 1.000 1.000
## Proportion Var 0.033 0.033 0.033 0.033 0.033 0.033 0.033
## Cumulative Var 0.300 0.333 0.367 0.400 0.433 0.467 0.500
## Comp.16 Comp.17 Comp.18 Comp.19 Comp.20 Comp.21 Comp.22
## SS loadings 1.000 1.000 1.000 1.000 1.000 1.000 1.000
## Proportion Var 0.033 0.033 0.033 0.033 0.033 0.033 0.033
## Cumulative Var 0.533 0.567 0.600 0.633 0.667 0.700 0.733
## Comp.23 Comp.24 Comp.25 Comp.26 Comp.27 Comp.28 Comp.29
## SS loadings 1.000 1.000 1.000 1.000 1.000 1.000 1.000
## Proportion Var 0.033 0.033 0.033 0.033 0.033 0.033 0.033
## Cumulative Var 0.767 0.800 0.833 0.867 0.900 0.933 0.967
## Comp.30
## SS loadings 1.000
## Proportion Var 0.033
## Cumulative Var 1.000
plot(pc, type="lines") # indicates 3? main components
print(pc$scores)
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5
## INVA 2.831152290 4.77279363 0.30845305 -0.46613008 0.98739139
## INVB -1.379029636 -0.17514652 1.57851215 3.21801748 0.38584706
## INVC 0.959867130 4.07703723 0.29965564 -2.24813572 0.94267387
## INVD 1.488516339 4.24265955 1.12151973 -1.46777907 0.72795728
## INVE -0.103974375 -5.91299105 0.35843192 1.86524207 -0.65590760
## INVF 1.316726933 4.00592294 0.92713235 0.05991851 -2.12715983
## INVG 2.719313801 3.24290345 -1.94798443 -1.22845583 -0.72503958
## INVH 3.866616310 -1.92487358 -0.05390742 -2.18402659 -0.28786675
## INVI 3.445436068 -1.25324820 -1.25272624 0.34115402 -2.52836962
## INVJ 4.701641851 -1.63739710 0.72257651 0.28973497 2.12019978
## INVK 4.117028018 -3.01891536 -0.42416327 -2.10342739 0.47881455
## INVL 2.309909363 -3.70933973 -0.60439490 -0.49257119 0.84099653
## INVM 2.117332377 -2.97710948 -0.59366797 -2.55621918 -0.17716093
## INVN 2.299848043 -2.55354216 0.33740835 -0.76100866 0.01875729
## USERA -3.577943602 -0.43145262 -0.18748076 -0.23554259 -0.21656299
## USERB 5.896119424 0.20451038 1.01884631 1.82764420 -1.07191999
## USERC -4.415563250 -1.31630874 -0.39687838 -0.74819208 0.34839043
## USERD -4.035208855 -1.43298699 -1.58295537 0.07331082 1.30750198
## USERE -4.077114586 0.04716503 -1.24776557 -0.37823996 1.27532901
## USERF -4.348561266 -0.73619523 -1.54059605 -0.59629128 0.56885234
## INF.MNG -3.489963782 0.65867308 1.09014145 -0.40080614 -1.71699741
## INF.UX -3.911857413 0.67804164 0.03242728 -0.57522672 -1.60103286
## INF.DEV -4.586613812 0.47237181 -0.22243760 -1.18121621 -1.26710915
## INF.SLF -5.325163916 -0.37547215 -0.08713402 -0.48906437 0.12568367
## X3.3A 0.006817647 -1.51304353 4.83657953 1.04241160 0.05228136
## X3.3B 1.470455357 0.54362417 -4.30855258 2.51090950 -0.41553139
## X3.3C -1.290231950 1.13688056 3.66853415 0.64982352 0.57140677
## X3.3D 0.914071768 1.45620258 -2.65604235 2.82507966 -0.02144642
## X3.3E -0.482239126 0.83804669 2.81065610 1.31211552 0.41115101
## X3.3F 0.562612849 2.59118969 -2.00418762 2.09697119 1.64887019
## Comp.6 Comp.7 Comp.8 Comp.9 Comp.10
## INVA -0.06081099 0.09840902 -0.85675144 -0.0008950916 0.06796906
## INVB -1.97273756 0.55220956 -0.41375493 2.1291422781 1.69016496
## INVC -0.46773298 -0.33746358 -0.67684441 -0.6064180480 -0.52698857
## INVD -0.88023942 0.61840164 0.65569360 1.1581185532 -0.67298070
## INVE -0.78811889 0.41298304 0.62859761 0.2748037165 -0.81462340
## INVF -0.67489797 0.67907266 -0.55091709 0.3085032681 1.04479886
## INVG -0.46637197 1.18815610 0.23146105 0.2545315161 -0.72967016
## INVH -0.20047936 -0.80841802 2.02966745 -0.6369541027 1.58584256
## INVI 1.20359691 -0.97026058 -0.05847765 -0.3007174965 -0.15409412
## INVJ 0.53894003 0.31344673 1.28816163 0.6351430539 -0.31141596
## INVK -0.18221377 -1.20369060 -0.38133739 0.3887538639 0.52102416
## INVL -1.62529699 0.08571437 -1.56038626 -0.4485880124 -0.67997688
## INVM 0.95989603 -1.09716023 -1.48085594 1.3041111187 0.76112688
## INVN -0.06459077 3.54424682 0.39297759 -1.8212128247 0.86109445
## USERA 0.39175995 -0.05598646 1.54736047 0.7413947369 -0.15086267
## USERB 0.45982497 0.02676292 -0.27855294 0.8053983739 -1.92330366
## USERC -0.47737880 0.51751403 -0.16013601 -0.0843482495 -0.36415392
## USERD 0.38872909 -0.01817889 -0.33925280 -0.0564150663 -0.55618491
## USERE 0.52068532 0.49569548 -0.59050556 0.2637842284 -0.20602785
## USERF 0.82204813 -0.39550744 -0.64948246 -0.2684493084 -0.46964531
## INF.MNG 0.69002546 0.34596318 0.34278976 -0.5700195122 -1.20632546
## INF.UX -0.40240376 -1.33824571 0.44638782 0.4841757599 0.22386100
## INF.DEV 0.17290048 0.60869915 0.52478737 0.0127292815 0.41951767
## INF.SLF -0.78031493 0.07420855 -0.06006900 0.2415652903 0.28247843
## X3.3A 1.61114088 0.35345531 -0.46808242 0.1374194048 -0.43839160
## X3.3B -1.53491941 -0.55048113 0.21433357 -0.5004678307 -0.27097028
## X3.3C 1.63307761 -0.73608920 -0.19487059 -0.6844619739 0.67444425
## X3.3D 1.97962155 0.40163504 -1.56852710 -0.7709262101 1.27702420
## X3.3E -2.21229399 -2.02062624 -0.04425482 -2.3594005626 -0.08946972
## X3.3F 1.41855515 -0.78446549 2.03084091 -0.0303001547 0.15573868
## Comp.11 Comp.12 Comp.13 Comp.14 Comp.15
## INVA 0.09838343 -0.353850709 0.621404439 -0.195719049 -0.61031585
## INVB 0.71226930 -0.005524616 -0.413657765 0.467701331 0.17290488
## INVC 0.35845136 -0.608352227 -0.383166285 -0.203498325 0.05961091
## INVD 0.58131372 -0.687619706 0.279576826 -0.213606201 0.60723608
## INVE -0.70112840 -0.486975556 -0.087636300 0.172158334 0.06195715
## INVF -0.44324437 0.586665799 -0.867475291 0.152801903 -0.99148536
## INVG -0.57676876 1.395335355 0.244520501 0.301638495 0.52239399
## INVH 0.17667143 0.322314355 0.115438787 0.890900765 0.01843435
## INVI 2.38750103 0.559063354 -0.427388557 -0.522969113 0.15279354
## INVJ 0.21797518 0.305652073 -1.602108081 -1.033025233 0.58261793
## INVK -0.55025047 -0.353885747 0.004526343 1.156158832 -0.52248061
## INVL -0.27335079 1.739050693 0.546830813 -0.003552179 0.48711654
## INVM -1.03752216 -0.808362009 0.170246322 -1.095665723 0.04327917
## INVN 0.33946081 -1.084300010 0.231135441 -0.191415821 -0.24456475
## USERA 0.54627975 0.604758060 0.869933654 -0.883420074 -1.23889302
## USERB 0.14674921 -0.576508352 0.409985097 0.704553666 -0.27523920
## USERC 0.20780010 0.717682543 0.405347590 -0.417298203 -0.51041419
## USERD 1.14373794 -0.726491197 -0.106876569 0.393675586 -0.02537341
## USERE 0.17693583 0.309875718 -0.937345976 0.886811678 -0.26420765
## USERF 0.55774136 -0.004682708 -0.629123751 0.485901636 -0.47598340
## INF.MNG -1.12135859 -0.366326344 -0.896276775 0.635754550 0.28545867
## INF.UX -0.24471616 -0.645157594 -0.095361911 -0.221982230 0.49646757
## INF.DEV -0.42402148 0.078734698 0.051073425 -0.345885534 0.86303551
## INF.SLF -0.21767478 0.007527774 0.605582998 -0.231940908 0.32420154
## X3.3A -0.66935807 0.326473209 0.439009489 -0.485629072 -0.37025930
## X3.3B -0.09841495 -0.889735090 0.911616440 -0.224114732 0.07879089
## X3.3C 0.75867636 0.230924270 1.261887249 0.847431010 0.84690468
## X3.3D -0.58556851 0.084015371 -0.219676462 -0.344684593 0.39227063
## X3.3E -0.22492016 -0.003820452 -0.667388292 -0.604499961 -0.23891366
## X3.3F -1.24164914 0.333519045 0.165366601 0.123419167 -0.22734361
## Comp.16 Comp.17 Comp.18 Comp.19 Comp.20
## INVA -0.26457073 0.33585517 -0.241042971 0.46391231 -0.336104657
## INVB 0.27446881 -0.23462800 -0.073094843 0.39235423 0.250249870
## INVC -0.55411918 0.28422497 -0.365863559 -0.08570465 0.447917641
## INVD 0.25879927 -0.82399179 -0.137409275 -0.09050311 -0.221532042
## INVE -0.32076752 0.34388097 -0.592010711 0.04347945 0.123225843
## INVF -0.21916072 0.32630580 -0.308982014 -0.50197757 -0.308126099
## INVG 0.65280497 0.44992454 0.406270182 0.21212115 0.658067374
## INVH -0.36793460 0.31346165 0.229001811 0.81530806 -0.106238465
## INVI 0.20136199 -0.38801635 -0.511004552 0.06012134 0.003110075
## INVJ -0.70036398 0.41130095 0.291749924 -0.29814441 -0.085985585
## INVK -0.23788242 -0.46245664 -0.060506828 -0.93343442 0.294698596
## INVL 0.30204398 -0.24804161 -0.092696559 -0.22770267 -0.471259022
## INVM 0.47979991 -0.21251808 0.059921163 0.58218396 0.019432656
## INVN 0.66711877 -0.10502826 0.050760288 -0.18882423 -0.019645873
## USERA -0.17876145 -0.46321642 0.820525331 -0.26944843 0.174538905
## USERB -0.05634242 0.06768326 0.382874623 0.01505327 -0.080716876
## USERC -0.50538010 0.10381546 -0.788438418 0.28164249 0.386704762
## USERD 0.55915054 0.35036826 -0.075936651 -0.11702849 0.245712640
## USERE 0.11101491 -0.16357285 0.648444995 0.20871361 -0.329351180
## USERF 0.20247674 0.34937587 -0.029528049 0.19119128 -0.364934990
## INF.MNG -0.40302700 -0.78496425 0.008504698 0.45333661 0.108765273
## INF.UX 0.83898438 0.85484384 0.240461138 -0.53007145 -0.019251953
## INF.DEV -0.21433354 -0.07334574 -0.128374406 -0.21417651 -0.455065590
## INF.SLF -0.77597838 -0.08206468 0.034144016 -0.16234005 0.028213825
## X3.3A 0.20653705 0.43166044 0.060805259 0.14366412 -0.052415089
## X3.3B -0.58438370 0.20672135 0.310385844 0.09982061 -0.265745985
## X3.3C -0.27659376 0.11172064 0.030246464 -0.23328744 0.019903223
## X3.3D -0.43244287 -0.27029431 0.333965179 -0.14004646 0.319913556
## X3.3E 0.51496747 -0.34343664 0.353929055 0.12334779 0.143232945
## X3.3F 0.82251357 -0.28556753 -0.857101133 -0.09356040 -0.107313777
## Comp.21 Comp.22 Comp.23 Comp.24 Comp.25
## INVA 0.192424831 0.526735072 0.32058461 -0.2138462295 -0.172506936
## INVB 0.246748180 -0.003799826 -0.05035590 -0.0741004539 0.051390866
## INVC 0.679967109 -0.378658498 -0.24990458 0.0957067537 0.095316001
## INVD -0.429567463 0.126423167 -0.01807699 0.3962371690 0.160680310
## INVE 0.030551677 -0.070598845 0.16414573 0.0002149144 0.045126109
## INVF -0.262683472 -0.455374188 0.11680265 0.0463403748 0.027860672
## INVG -0.225589513 0.004364650 -0.10271355 -0.2254965042 0.074107382
## INVH 0.073626861 0.102345005 0.04601934 0.2375993276 0.106924066
## INVI 0.115937771 0.126187425 -0.23360866 -0.0843088585 -0.083330193
## INVJ -0.153685799 -0.026569810 0.14224634 -0.0856085966 -0.078387065
## INVK -0.147658765 0.325121345 -0.22780127 -0.0941097945 -0.005106212
## INVL 0.384207249 -0.120584438 0.22538868 0.2090042644 0.016922150
## INVM -0.159416309 -0.413409341 0.08982140 -0.1434978467 -0.055419775
## INVN 0.089645095 -0.021405559 0.01526874 -0.0534434355 -0.069459079
## USERA 0.275504944 -0.190940303 0.22818979 0.0271559669 0.065419679
## USERB 0.042188972 0.117786462 0.17980760 -0.1763073315 -0.027528957
## USERC -0.513099632 0.187451515 0.04212705 0.1962611275 -0.282301170
## USERD -0.107937755 -0.169238431 0.17566216 -0.0403035437 0.078145762
## USERE 0.036657387 -0.053011922 -0.39462551 0.0329397183 -0.408021322
## USERF -0.211610054 0.076497782 0.12112920 -0.0398911212 0.484364862
## INF.MNG 0.177253325 -0.100889290 0.22013842 0.1028026851 -0.065338164
## INF.UX 0.213221135 0.311490272 0.18370160 0.3299926209 -0.221201114
## INF.DEV 0.005169748 0.137271173 -0.20017670 -0.4052923270 0.077059624
## INF.SLF 0.158994609 0.285736507 -0.08311084 -0.2788780620 0.099958103
## X3.3A 0.031810148 0.172383810 -0.65670035 0.2269153372 0.153476654
## X3.3B -0.219088888 -0.334141990 -0.41103155 0.1057620323 -0.039067064
## X3.3C -0.249420236 -0.481608313 0.19936158 -0.0954310684 -0.129221615
## X3.3D -0.014827398 0.320932563 0.20364362 0.2845506900 0.121919382
## X3.3E -0.258587171 0.100383209 0.01597969 -0.1956370405 0.012296292
## X3.3F 0.199263414 -0.100879203 -0.06191230 -0.0853307684 -0.034079248
## Comp.26 Comp.27 Comp.28 Comp.29 Comp.30
## INVA 0.231320805 0.017527270 -0.110087642 0.0681087559 6.522560e-16
## INVB 0.087766688 0.051203773 0.114662042 0.0189312987 5.800915e-15
## INVC -0.060680309 0.101713313 0.028392339 -0.0806389611 3.851086e-15
## INVD -0.061031547 0.018860477 -0.112256783 -0.0208744792 -7.188694e-15
## INVE -0.007096329 0.177998096 -0.302973539 -0.0408435967 -5.273559e-16
## INVF -0.063629302 -0.130008839 -0.053262558 -0.0023371005 -4.017620e-15
## INVG 0.040941531 0.048187887 -0.062996836 0.0581473719 2.775558e-16
## INVH -0.055974107 -0.104903381 -0.004120704 -0.0786110080 -2.803313e-15
## INVI -0.018662160 -0.005144985 -0.061818022 0.0648694814 -6.245005e-16
## INVJ 0.046142365 -0.023889233 0.074232062 0.0668999501 3.124237e-15
## INVK 0.158594113 0.035128780 0.027358937 0.0399996866 2.262079e-15
## INVL 0.041115228 -0.055104422 0.042868840 -0.0003555607 1.998401e-15
## INVM -0.078856398 0.001031558 0.003207370 0.0017212211 -1.942890e-15
## INVN -0.077663787 0.047747813 0.071146841 0.0531186572 8.604228e-16
## USERA 0.101976720 0.078917879 -0.037594245 -0.0241750340 1.304512e-15
## USERB -0.245283711 -0.041017193 0.161064830 -0.1474116624 2.206568e-15
## USERC -0.053263293 0.038126658 0.182431984 -0.0715124860 1.984524e-15
## USERD 0.188108671 -0.383240378 -0.067493916 -0.0743182012 -6.064593e-15
## USERE -0.129974979 0.075338556 -0.142382874 -0.0511007719 -2.518818e-15
## USERF -0.060063177 0.231224126 0.104522925 0.0906141408 3.705369e-15
## INF.MNG 0.186812767 -0.086275400 0.081804016 0.1350053234 1.658396e-15
## INF.UX -0.037481751 0.061402350 0.057091928 0.0449003764 5.561523e-15
## INF.DEV 0.210111947 0.042049622 0.058473722 -0.2191280867 -4.669876e-15
## INF.SLF -0.401510629 -0.208742321 -0.057545780 0.1410491098 -1.637579e-15
## X3.3A 0.104994274 -0.102601228 -0.006020209 0.0299893297 -2.053913e-15
## X3.3B 0.136861801 -0.002328814 0.067938885 0.0848584242 6.661338e-16
## X3.3C 0.002752827 0.116479003 -0.014465584 0.0523909185 -7.459311e-17
## X3.3D -0.036424933 0.013142838 -0.052027733 -0.0916977037 -4.163336e-16
## X3.3E -0.040574223 0.010514039 -0.033038616 -0.0550159374 -2.563921e-15
## X3.3F -0.109329104 -0.023337843 0.042888321 0.0074165436 2.220446e-15
biplot(pc)
# to look at the eigenvalues:
get_eigenvalue(pc)
## eigenvalue variance.percent cumulative.variance.percent
## Dim.1 1.013820e+01 3.379399e+01 33.79399
## Dim.2 6.146801e+00 2.048934e+01 54.28333
## Dim.3 3.170723e+00 1.056908e+01 64.85241
## Dim.4 2.267436e+00 7.558119e+00 72.41053
## Dim.5 1.182065e+00 3.940218e+00 76.35074
## Dim.6 1.099152e+00 3.663839e+00 80.01458
## Dim.7 9.519787e-01 3.173262e+00 83.18785
## Dim.8 8.041308e-01 2.680436e+00 85.86828
## Dim.9 7.205045e-01 2.401682e+00 88.26996
## Dim.10 6.380423e-01 2.126808e+00 90.39677
## Dim.11 5.292950e-01 1.764317e+00 92.16109
## Dim.12 4.149503e-01 1.383168e+00 93.54426
## Dim.13 3.813438e-01 1.271146e+00 94.81540
## Dim.14 3.318427e-01 1.106142e+00 95.92154
## Dim.15 2.479589e-01 8.265298e-01 96.74807
## Dim.16 2.115416e-01 7.051388e-01 97.45321
## Dim.17 1.492992e-01 4.976639e-01 97.95088
## Dim.18 1.420271e-01 4.734235e-01 98.42430
## Dim.19 1.223940e-01 4.079799e-01 98.83228
## Dim.20 7.378763e-02 2.459588e-01 99.07824
## Dim.21 6.099675e-02 2.033225e-01 99.28156
## Dim.22 5.983147e-02 1.994382e-01 99.48100
## Dim.23 5.030907e-02 1.676969e-01 99.64870
## Dim.24 3.505684e-02 1.168561e-01 99.76555
## Dim.25 2.424504e-02 8.081681e-02 99.84637
## Dim.26 1.782447e-02 5.941489e-02 99.90578
## Dim.27 1.265174e-02 4.217245e-02 99.94796
## Dim.28 9.261707e-03 3.087236e-02 99.97883
## Dim.29 6.351408e-03 2.117136e-02 100.00000
## Dim.30 4.107545e-16 1.369182e-15 100.00000
library(nFactors)
ev <- eigen(cor_matrix)
ap <- parallel(subject = nrow(na.omit(clus_data_selected)), var = ncol(na.omit(clus_data_selected)), rep = 100, cent = .05)
nS <- nScree(x = ev$values, aparallel = ap$eigen$qevpea)
plotnScree(nS)
# number of rows in the dataset
n <- nrow(clus_data)
n
## [1] 130
# choose randomly 80% of the rows
ind <- sample(n, size = n * 0.8)
# create train set
training <- clus_data[ind,]
# create test set
testing <- clus_data[-ind,]
# save the correct classes from test and train data
test_classes <- clus_data$COMPANY
testing <- dplyr::select(testing, -COMPANY)
#OR like below:
train_classes <- training$COMPANY
train_classes <- factor(train_classes)
train.def <- clus_data$COMPANY[-ind]
# company_pred <- knn(train = training, test = testing, cl = train_def, k=3)
# however the function above does not work. Is it because of NA's in the data?
# remove the company variable from test data
# test <- dplyr::select(test, -COMPANY)
# linear discriminant analysis
# lda.fit <- lda(age_range ~ , data = train)
# print the lda.fit object
# lda.fit